1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.0.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)

op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 Material and Methods

1.2 The Data


dataGI <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/GI/data.xlsx", sheet = "Sheet1"))
dataGI$ID <- NULL

table(dataGI$V2)
#> 
#>  1  2 
#> 76 76
dataSet1 <- subset(dataGI,V2==1)
class <- dataSet1$V1
dataSet1$V1 <- NULL
dataSet1$V2 <- NULL
colnames(dataSet1) <- paste(colnames(dataSet1),"WL",sep="_")
dataSet2 <- subset(dataGI,V2==2)
dataSet2$V1 <- NULL
dataSet2$V2 <- NULL
colnames(dataSet2) <- paste(colnames(dataSet2),"NBI",sep="_")
dataGI <- cbind(dataSet1,dataSet2)
dataGI$class <- 1*(class > 1)
table(dataGI$class)
#> 
#>  0  1 
#> 21 55

1.2.0.1 Standarize the names for the reporting

studyName <- "GI"
dataframe <- dataGI
outcome <- "class"

TopVariables <- 10

thro <- 0.80
cexheat = 0.15

1.3 Generaring the report

1.3.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")
library("rpart")

1.3.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
76 1396
pander::pander(table(dataframe[,outcome]))
0 1
21 55

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1500 

1.3.3 Scaling the data

Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns


  ### Some global cleaning
  sdiszero <- apply(dataframe,2,sd) > 1.0e-16
  dataframe <- dataframe[,sdiszero]

  varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
  tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
  dataframe <- dataframe[,tokeep]

  varlist <- colnames(dataframe)
  varlist <- varlist[varlist != outcome]
  
  iscontinous <- sapply(apply(dataframe,2,unique),length) > 5 ## Only variables with enough samples



dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData

1.4 The heatmap of the data

numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000


if (!largeSet)
{

  hm <- heatMaps(data=dataframeScaled[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 xlab="Feature",
                 ylab="Sample",
                 srtCol=45,
                 srtRow=45,
                 cexCol=cexheat,
                 cexRow=cexheat
                 )
  par(op)
}

1.4.0.1 Correlation Matrix of the Data

The heat map of the data


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  #cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
  cormat <- cor(dataframe[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.9999797

1.5 The decorrelation


DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#> 
#>  Included: 725 , Uni p: 0.01309165 , Uncorrelated Base: 29 , Outcome-Driven Size: 0 , Base Size: 29 
#> 
#> 
 1 <R=1.000,r=0.975,N=  185>, Top: 31( 29 )[ 1 : 31 Fa= 30 : 0.975 ]( 30 , 78 , 0 ),<|>Tot Used: 108 , Added: 78 , Zero Std: 0 , Max Cor: 1.000
#> 
 2 <R=1.000,r=0.975,N=  185>, Top: 8( 26 )[ 1 : 8 Fa= 38 : 0.975 ]( 8 , 51 , 30 ),<|>Tot Used: 153 , Added: 51 , Zero Std: 0 , Max Cor: 1.000
#> 
 3 <R=1.000,r=0.975,N=  185>, Top: 7( 15 )[ 1 : 7 Fa= 44 : 0.975 ]( 7 , 30 , 38 ),<|>Tot Used: 178 , Added: 30 , Zero Std: 0 , Max Cor: 0.999
#> 
 4 <R=0.999,r=0.975,N=  185>, Top: 5( 6 )[ 1 : 5 Fa= 48 : 0.975 ]( 4 , 24 , 44 ),<|>Tot Used: 181 , Added: 24 , Zero Std: 0 , Max Cor: 0.975
#> 
 5 <R=0.975,r=0.962,N=  185>, Top: 53( 1 )[ 1 : 53 Fa= 80 : 0.962 ]( 51 , 59 , 48 ),<|>Tot Used: 247 , Added: 59 , Zero Std: 0 , Max Cor: 0.974
#> 
 6 <R=0.974,r=0.962,N=  185>, Top: 4( 1 )[ 1 : 4 Fa= 83 : 0.962 ]( 4 , 4 , 80 ),<|>Tot Used: 251 , Added: 4 , Zero Std: 0 , Max Cor: 0.962
#> 
 7 <R=0.962,r=0.931,N=  222>, Top: 75[ 3 ]( 1 )=[ 2 : 75 Fa= 127 : 0.942 ]( 72 , 105 , 83 ),<|>Tot Used: 368 , Added: 105 , Zero Std: 0 , Max Cor: 0.994
#> 
 8 <R=0.994,r=0.947,N=  222>, Top: 9( 1 )[ 1 : 9 Fa= 135 : 0.947 ]( 9 , 9 , 127 ),<|>Tot Used: 383 , Added: 9 , Zero Std: 0 , Max Cor: 0.941
#> 
 9 <R=0.941,r=0.871,N=  302>, Top: 93( 1 )=[ 2 : 93 Fa= 182 : 0.911 ]( 87 , 139 , 135 ),<|>Tot Used: 523 , Added: 139 , Zero Std: 0 , Max Cor: 0.960
#> 
 10 <R=0.960,r=0.880,N=  302>, Top: 20( 1 )[ 1 : 20 Fa= 191 : 0.880 ]( 18 , 26 , 182 ),<|>Tot Used: 545 , Added: 26 , Zero Std: 0 , Max Cor: 0.915
#> 
 11 <R=0.915,r=0.807,N=  245>, Top: 73( 5 )[ 1 : 73 Fa= 219 : 0.807 ]( 71 , 121 , 191 ),<|>Tot Used: 593 , Added: 121 , Zero Std: 0 , Max Cor: 0.926
#> 
 12 <R=0.926,r=0.813,N=  245>, Top: 17( 3 )[ 1 : 17 Fa= 226 : 0.813 ]( 16 , 20 , 219 ),<|>Tot Used: 610 , Added: 20 , Zero Std: 0 , Max Cor: 0.851
#> 
 13 <R=0.851,r=0.800,N=   49>, Top: 23( 1 )[ 1 : 23 Fa= 234 : 0.800 ]( 23 , 26 , 226 ),<|>Tot Used: 626 , Added: 26 , Zero Std: 0 , Max Cor: 0.799
#> 
 14 <R=0.799,r=0.800,N=   49>
#> 
 [ 14 ], 0.7986822 Decor Dimension: 626 Nused: 626 . Cor to Base: 244 , ABase: 17 , Outcome Base: 0 
#> 
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]

pander::pander(sum(apply(dataframe[,varlist],2,var)))

7.73e+08

pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))

1.8e+08

pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))

0.306

pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))

0.218

1.5.1 The decorrelation matrix


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  
  UPSTM <- attr(DEdataframe,"UPSTM")
  
  gplots::heatmap.2(1.0*(abs(UPSTM)>0),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Decorrelation matrix",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="|Beta|>0",
                    xlab="Output Feature", ylab="Input Feature")
  
  par(op)
}

1.6 The heatmap of the decorrelated data

if (!largeSet)
{

  hm <- heatMaps(data=DEdataframe[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 cexRow = cexheat,
                 cexCol = cexheat,
                 srtCol=45,
                 srtRow=45,
                 xlab="Feature",
                 ylab="Sample")
  par(op)
}

1.7 The correlation matrix after decorrelation

if (!largeSet)
{

  cormat <- cor(DEdataframe[,varlistc],method="pearson")
  cormat[is.na(cormat)] <- 0
  
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Correlation after IDeA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  
  par(op)
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.9419448

1.8 U-MAP Visualization of features

1.8.1 The UMAP based on LASSO on Raw Data


if (nrow(dataframe) < 1000)
{
  classes <- unique(dataframe[1:numsub,outcome])
  raincolors <- rainbow(length(classes))
  names(raincolors) <- classes
  datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
  text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}

1.8.2 The decorralted UMAP

if (nrow(dataframe) < 1000)
{

  datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
  text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}

1.9 Univariate Analysis

1.9.1 Univariate



univarRAW <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               dataframe,
               rankingTest="AUC")

100 : V102_WL 200 : V288_WL 300 : V535_WL 400 : V635_WL 500 : V37_NBI
600 : V137_NBI 700 : V470_NBI




univarDe <- uniRankVar(varlistc,
               paste(outcome,"~1"),
               outcome,
               DEdataframe,
               rankingTest="AUC",
               )

100 : La_V102_WL 200 : La_V288_WL 300 : La_V535_WL 400 : La_V635_WL 500 : La_V37_NBI
600 : La_V137_NBI 700 : La_V470_NBI

1.9.2 Final Table


univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")

##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
V172_WL 3.55e+03 1.78e+03 1046.667 537.2409 0.718095 0.933
V220_NBI 2.01e+02 1.20e+02 51.524 27.8220 0.747592 0.929
V220_WL 1.96e+02 1.07e+02 52.381 42.7370 0.097268 0.927
V477_NBI 6.18e-02 2.98e-02 0.149 0.1717 0.000358 0.925
V169_NBI 1.26e+03 8.24e+02 346.619 198.5476 0.350000 0.920
V196_NBI 4.52e+02 2.51e+02 134.238 66.3226 0.410564 0.920
V182_NBI 3.44e+02 2.17e+02 95.190 48.8412 0.793090 0.915
V470_NBI 3.79e-01 1.34e-01 0.188 0.0682 0.948083 0.913
V182_WL 3.17e+02 1.69e+02 96.476 87.3691 0.142781 0.912
V474_NBI 3.40e+00 3.13e-01 2.680 0.5481 0.222068 0.912


topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]

theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]

pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
V474_NBI 3.40e+00 3.13e-01 2.68e+00 5.48e-01 0.2221 0.912
V169_WL 1.20e+03 6.66e+02 4.03e+02 4.20e+02 0.0543 0.897
V474_WL 3.19e+00 4.57e-01 2.36e+00 5.29e-01 0.9972 0.882
V4_WL 1.67e+03 9.90e+02 6.00e+02 4.77e+02 0.0868 0.874
La_V69_WL 1.03e-03 1.66e-03 -1.02e-03 1.56e-03 0.3074 0.872
V473_NBI 1.22e-01 4.19e-02 2.12e-01 1.67e-01 0.0188 0.865
V485_WL 3.14e+00 4.63e-01 2.44e+00 4.51e-01 0.5556 0.853
V473_WL 1.57e-01 5.21e-02 2.82e-01 1.38e-01 0.3081 0.850
V198_NBI 3.83e+02 2.17e+02 1.54e+02 9.18e+01 0.3333 0.844
La_V200_NBI -1.08e+03 2.09e+03 1.03e+03 2.22e+03 0.0393 0.835
La_V91_NBI 6.01e-03 4.07e-03 1.88e-03 2.59e-03 0.9531 0.825
La_V478_WL 9.13e-01 2.30e-02 9.48e-01 3.09e-02 0.9838 0.824
La_V296_NBI -3.21e+02 1.47e+03 3.76e+02 9.94e+02 0.0346 0.810

dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")

theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))


theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)

pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
mean total fraction
2.35 546 0.748


allSigvars <- names(dc)



dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
  coef <- theFormulas[[dx]]
  cname <- names(theFormulas[[dx]])
  names(cname) <- cname
  for (cf in names(coef))
  {
    if (cf != dx)
    {
      if (coef[cf]>0)
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
      }
      else
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("%5.3f*%s",coef[cf],cname[cf]))
      }
    }
  }
}

finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])


orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]

Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")

finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
  DecorFormula caseMean caseStd controlMean controlStd controlKSP ROCAUC RAWAUC fscores
V474_NBI 3.40e+00 3.13e-01 2.68e+00 5.48e-01 0.2221 0.912 0.912 NA
V169_WL 1.20e+03 6.66e+02 4.03e+02 4.20e+02 0.0543 0.897 0.897 9
V474_WL 3.19e+00 4.57e-01 2.36e+00 5.29e-01 0.9972 0.882 0.882 NA
V4_WL 1.67e+03 9.90e+02 6.00e+02 4.77e+02 0.0868 0.874 0.874 4
La_V69_WL -1.863V47_WL + 1.000V69_WL 1.03e-03 1.66e-03 -1.02e-03 1.56e-03 0.3074 0.872 0.617 -1
V473_NBI 1.22e-01 4.19e-02 2.12e-01 1.67e-01 0.0188 0.865 0.865 2
V485_WL 3.14e+00 4.63e-01 2.44e+00 4.51e-01 0.5556 0.853 0.853 1
V473_WL 1.57e-01 5.21e-02 2.82e-01 1.38e-01 0.3081 0.850 0.850 1
V198_NBI 3.83e+02 2.17e+02 1.54e+02 9.18e+01 0.3333 0.844 0.844 13
La_V200_NBI -0.862V184_NBI + 1.000V200_NBI -1.08e+03 2.09e+03 1.03e+03 2.22e+03 0.0393 0.835 0.763 -1
La_V91_NBI -2.866V47_NBI + 1.000V91_NBI 6.01e-03 4.07e-03 1.88e-03 2.59e-03 0.9531 0.825 0.578 1
La_V478_WL + 0.130V475_WL + 1.000V478_WL 9.13e-01 2.30e-02 9.48e-01 3.09e-02 0.9838 0.824 0.788 1
V184_NBI NA 1.75e+04 1.38e+04 7.48e+03 6.94e+03 0.0334 0.815 0.815 NA
La_V296_NBI -13.104V198_NBI + 1.000V296_NBI -3.21e+02 1.47e+03 3.76e+02 9.94e+02 0.0346 0.810 0.758 -1
V478_WL NA 8.40e-01 5.41e-02 8.90e-01 6.37e-02 0.4143 0.788 0.788 NA
V200_NBI NA 1.41e+04 1.22e+04 7.48e+03 7.85e+03 0.0148 0.763 0.763 NA
V296_NBI NA 4.70e+03 3.29e+03 2.40e+03 2.04e+03 0.1764 0.758 0.758 NA
V475_WL NA 5.66e-01 3.42e-01 4.48e-01 5.50e-01 0.0297 0.730 0.730 3
V69_WL NA 1.58e-02 5.93e-03 1.34e-02 5.45e-03 0.7833 0.617 0.617 NA
V91_NBI NA 2.49e-02 7.61e-03 2.23e-02 8.15e-03 0.9484 0.578 0.578 NA
V47_NBI NA 6.61e-03 2.17e-03 7.13e-03 2.65e-03 0.6695 0.558 0.558 6
V47_WL NA 7.93e-03 3.09e-03 7.72e-03 2.76e-03 0.9876 0.492 0.492 NA

1.10 Comparing IDeA vs PCA vs EFA

1.10.1 PCA

featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,tol=0.002)   #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous]) 
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

#pander::pander(pc$rotation)


PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])


  gplots::heatmap.2(abs(PCACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "PCA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")

1.10.2 EFA


EFAdataframe <- dataframeScaled

if (length(iscontinous) < 2000)
{
  topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
  if (topred < 2) topred <- 2
  
  uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE)  # EFA analysis
  predEFA <- predict(uls,dataframeScaled[,iscontinous])
  EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
  colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous]) 


  
  EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
  
  
    gplots::heatmap.2(abs(EFACor),
                      trace = "none",
    #                  scale = "row",
                      mar = c(5,5),
                      col=rev(heat.colors(5)),
                      main = "EFA Correlation",
                      cexRow = 0.5,
                      cexCol = 0.5,
                       srtCol=45,
                       srtRow= -45,
                      key.title=NA,
                      key.xlab="Pearson Correlation",
                      xlab="Feature", ylab="Feature")
}

1.11 Effect on CAR modeling

par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(rawmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
  }


pander::pander(table(dataframe[,outcome],pr))
  0 1
0 17 4
1 3 52
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.7368 0.6232 0.831
    tp 0.7237 0.6091 0.820
    se 0.9455 0.8488 0.989
    sp 0.8095 0.5809 0.946
    diag.ac 0.9079 0.8194 0.962
    diag.or 73.6667 14.9632 362.674
    nndx 1.3245 1.0705 2.327
    youden 0.7550 0.4297 0.934
    pv.pos 0.9286 0.8271 0.980
    pv.neg 0.8500 0.6211 0.968
    lr.pos 4.9636 2.0506 12.015
    lr.neg 0.0674 0.0220 0.206
    p.rout 0.2632 0.1687 0.377
    p.rin 0.7368 0.6232 0.831
    p.tpdn 0.1905 0.0545 0.419
    p.tndp 0.0545 0.0114 0.151
    p.dntp 0.0714 0.0198 0.173
    p.dptn 0.1500 0.0321 0.379
  • tab:

      Outcome + Outcome - Total
    Test + 52 4 56
    Test - 3 17 20
    Total 55 21 76
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.908 0.819 0.962
3 se 0.945 0.849 0.989
4 sp 0.810 0.581 0.946
6 diag.or 73.667 14.963 362.674

par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(IDeAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
  }

pander::pander(table(DEdataframe[,outcome],pr))
  0 1
0 18 3
1 1 54
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.7500 0.63744 8.42e-01
    tp 0.7237 0.60914 8.20e-01
    se 0.9818 0.90281 1.00e+00
    sp 0.8571 0.63658 9.70e-01
    diag.ac 0.9474 0.87069 9.85e-01
    diag.or 324.0000 31.67589 3.31e+03
    nndx 1.1920 1.03194 1.85e+00
    youden 0.8390 0.53938 9.69e-01
    pv.pos 0.9474 0.85380 9.89e-01
    pv.neg 0.9474 0.73972 9.99e-01
    lr.pos 6.8727 2.40921 1.96e+01
    lr.neg 0.0212 0.00302 1.49e-01
    p.rout 0.2500 0.15772 3.63e-01
    p.rin 0.7500 0.63744 8.42e-01
    p.tpdn 0.1429 0.03049 3.63e-01
    p.tndp 0.0182 0.00046 9.72e-02
    p.dntp 0.0526 0.01099 1.46e-01
    p.dptn 0.0526 0.00133 2.60e-01
  • tab:

      Outcome + Outcome - Total
    Test + 54 3 57
    Test - 1 18 19
    Total 55 21 76
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.947 0.871 0.985
3 se 0.982 0.903 1.000
4 sp 0.857 0.637 0.970
6 diag.or 324.000 31.676 3314.066

par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
  plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
  text(PCAmodel, use.n = TRUE,cex=0.75)
  ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
  0 1
0 18 3
1 6 49
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.6842 0.5675 0.786
    tp 0.7237 0.6091 0.820
    se 0.8909 0.7775 0.959
    sp 0.8571 0.6366 0.970
    diag.ac 0.8816 0.7871 0.944
    diag.or 49.0000 11.0713 216.866
    nndx 1.3368 1.0771 2.415
    youden 0.7481 0.4141 0.928
    pv.pos 0.9423 0.8405 0.988
    pv.neg 0.7500 0.5329 0.902
    lr.pos 6.2364 2.1786 17.852
    lr.neg 0.1273 0.0586 0.276
    p.rout 0.3158 0.2139 0.433
    p.rin 0.6842 0.5675 0.786
    p.tpdn 0.1429 0.0305 0.363
    p.tndp 0.1091 0.0411 0.222
    p.dntp 0.0577 0.0121 0.159
    p.dptn 0.2500 0.0977 0.467
  • tab:

      Outcome + Outcome - Total
    Test + 49 3 52
    Test - 6 18 24
    Total 55 21 76
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.882 0.787 0.944
3 se 0.891 0.778 0.959
4 sp 0.857 0.637 0.970
6 diag.or 49.000 11.071 216.866


par(op)

1.11.1 EFA


  EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
  EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
  pr <- predict(EFAmodel,EFAdataframe,type = "class")
  
  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(EFAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
  }


  pander::pander(table(EFAdataframe[,outcome],pr))
  0 1
0 19 2
1 4 51
  pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.6974 0.5813 0.798
    tp 0.7237 0.6091 0.820
    se 0.9273 0.8241 0.980
    sp 0.9048 0.6962 0.988
    diag.ac 0.9211 0.8360 0.970
    diag.or 121.1250 20.4827 716.278
    nndx 1.2019 1.0330 1.922
    youden 0.8320 0.5204 0.968
    pv.pos 0.9623 0.8702 0.995
    pv.neg 0.8261 0.6122 0.950
    lr.pos 9.7364 2.6001 36.459
    lr.neg 0.0804 0.0310 0.209
    p.rout 0.3026 0.2025 0.419
    p.rin 0.6974 0.5813 0.798
    p.tpdn 0.0952 0.0117 0.304
    p.tndp 0.0727 0.0202 0.176
    p.dntp 0.0377 0.0046 0.130
    p.dptn 0.1739 0.0495 0.388
  • tab:

      Outcome + Outcome - Total
    Test + 51 2 53
    Test - 4 19 23
    Total 55 21 76
  • method: exact

  • digits: 2

  • conf.level: 0.95

  pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.921 0.836 0.970
3 se 0.927 0.824 0.980
4 sp 0.905 0.696 0.988
6 diag.or 121.125 20.483 716.278
  par(op)